Uploading data
df_deficit_pct <- read_csv("data/deficit_pct_gdp_OECD.csv")
Creating a wide-form dataset with columns for each year There are 23 rows with flag E (estimate/preliminary data) and 1 B (break in series) that were put in table df_e_or_b
df_deficit_pct %>% count(`Flag Codes`)
## # A tibble: 3 x 2
## `Flag Codes` n
## <chr> <int>
## 1 B 1
## 2 E 23
## 3 <NA> 1116
df_deficit_pct %>% filter(!is.na(`Flag Codes`))
## # A tibble: 24 x 8
## LOCATION INDICATOR SUBJECT MEASURE FREQUENCY TIME deficit_pc `Flag Codes`
## <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <chr>
## 1 FIN GGNLEND TOT PC_GDP A 1975 4.93 E
## 2 FIN GGNLEND TOT PC_GDP A 1976 7.61 E
## 3 FIN GGNLEND TOT PC_GDP A 1977 6.25 E
## 4 FIN GGNLEND TOT PC_GDP A 1978 3.83 E
## 5 FIN GGNLEND TOT PC_GDP A 1979 3.40 E
## 6 FRA GGNLEND TOT PC_GDP A 2018 -2.29 E
## 7 FRA GGNLEND TOT PC_GDP A 2019 -3.01 E
## 8 GRC GGNLEND TOT PC_GDP A 2010 -11.3 B
## 9 GRC GGNLEND TOT PC_GDP A 2018 1.02 E
## 10 GRC GGNLEND TOT PC_GDP A 2019 1.52 E
## # … with 14 more rows
df_wide_deficit <- df_deficit_pct %>%
select(-`Flag Codes`) %>% #this removes the variable Flag Codes to make the wide dataset cleaner
pivot_wider(names_from = TIME, values_from = deficit_pc)
Creating a comparators variable
comparators <- c("DNK","IRL","NZL","NOR","SWE")
df_deficit_pct <- df_deficit_pct %>%
mutate(in_comparators=if_else(LOCATION%in%comparators,TRUE,FALSE))
Renaming deficit_pc to balance of payments
df_deficit_pct <- df_deficit_pct %>%
mutate(balance_pc = deficit_pc)
Transforming deficit_pc (currently a measure of balance of payments where positive values mean surpluses) into a measure of deficit (higher values mean higher deficits)
df_deficit_pct <- df_deficit_pct %>%
mutate(deficit_pc = -deficit_pc)
Creating rolling averages over 5, 7 and 10 years These variable names will be changed as now they are no longer rolling averages of the balance but of actual deficit
df_deficit_pct <- df_deficit_pct %>% #this function uses longform
group_by(LOCATION) %>%
mutate(bal_05ya = rollmean(deficit_pc, k = 5, fill = NA),
bal_07ya = rollmean(deficit_pc, k = 7, fill = NA),
bal_10ya = rollmean(deficit_pc, k = 10, fill = NA)) %>%
ungroup() #this closes that group_by(LOCATION)
Creating different measures of rolling minimum deficits
Creating rolling minimums over 3 and 5 years This is the minimum deficit in rolling 5 year periods including GFC
df_deficit_pct <- df_deficit_pct %>% #this function uses longform
group_by(LOCATION) %>%
mutate(min_05y = rollapply(deficit_pc,5,min, fill=NA, align = "right"),
min_03y = rollapply(deficit_pc,3,min, fill=NA, align = "right")
) %>%
ungroup ()
df_deficit_no_GFC <- df_deficit_pct %>%
filter(!(TIME%in%c(2009,2010,2011,2012,2013)))
Creating rolling minimums over 3 and 5 years This is the minimum deficit in rolling 5 year periods excluding GFC
df_deficit_no_GFC <- df_deficit_no_GFC %>%
group_by(LOCATION) %>%
mutate(min_05y_no_GFC = rollapply(deficit_pc,5,min, fill=NA, align = "right"),
min_03y_no_GFC = rollapply(deficit_pc,3,min, fill=NA, align = "right")
) %>%
ungroup ()
Convoluted attempt at creating maximum values
Creating maximum values of 5-rolling minimum deficits
max_min_5 <- df_deficit_pct %>%
group_by(LOCATION) %>%
summarise(max_5 = max(min_05y, na.rm=TRUE))
Convoluted attempt at creating maximum values
Creating maximum values of 5-rolling minimums without GFC years
max_min_5_no_GFC <- df_deficit_no_GFC %>%
group_by(LOCATION) %>%
summarise(max_5_GFC = max(min_05y_no_GFC, na.rm=TRUE))
Creating maximum values of 5-rolling minimums without GFC years and keeping the information of all the other variables
max_5ymin <- df_deficit_no_GFC %>%
group_by(LOCATION) %>%
arrange(desc(min_05y_no_GFC)) %>%
slice(1) %>%
ungroup()
Creating maximum values of 5-rolling average without GFC years
max_5ya <- df_deficit_no_GFC %>%
group_by(LOCATION) %>%
arrange(desc(bal_05ya)) %>%
slice(1) %>%
ungroup()
deficit_pc and its rolling average recodings: Positive values mean budget deficits and negative values mean budget surpluses
Visualising balance as % of GDP for all countries over the years (positive values are surpluses, negative values are deficits)
p1 <- df_deficit_pct %>% #making p into a ggplot object to visualise w/ plotly
# line below would invert surplus/deficit in deficit_pc, making higher values mean higher deficits
# mutate(deficit_pc=-deficit_pc) %>%
ggplot(aes(TIME, deficit_pc, colour=LOCATION)) +
geom_line()
ggplotly(p1) #plotly lets me see the legend when scrolling through the plot
Visualising 5 year rolling averages of deficit as % of GDP for all countries
p2 <- df_deficit_pct %>%
ggplot(aes(TIME, bal_05ya, colour=LOCATION)) +
geom_line()
ggplotly(p2)
Visualising 7 year rolling averages of deficit as % of GDP for all countries
p3 <- df_deficit_pct %>%
ggplot(aes(TIME, bal_07ya, colour=LOCATION)) +
geom_line()
ggplotly(p3)
Visualising 10 year rolling averages of deficit as % of GDP for all countries
p4 <- df_deficit_pct %>%
ggplot(aes(TIME, bal_10ya, colour=LOCATION)) +
geom_line()
ggplotly(p4)
Visualising 7 year rolling averages of deficit as % of GDP for all countries from 2000
p5 <- df_deficit_pct %>%
filter(TIME>=2000) %>%
ggplot(aes(TIME, bal_07ya, colour=LOCATION)) +
geom_line()
ggplotly(p5)
Visualising 7 year rolling averages of deficit % of GDP for all countries higher values mean higher deficits
p6 <- df_deficit_pct %>%
ggplot(aes(TIME, bal_07ya, colour=LOCATION)) +
geom_line()
ggplotly(p6)
###Plot 7 Visualising 7 year rolling averages of deficit % of GDP for all countries with highlighted comparators higher values mean higher deficits
p7 <- df_deficit_pct %>%
ggplot(aes(TIME, bal_07ya, colour=in_comparators, group=LOCATION)) +
geom_line()
ggplotly(p7)
Visualising surpluses as % of GDP for all countries with highlighted comparators higher values mean higher surpluses
p8 <- df_deficit_pct %>%
ggplot(aes(TIME, balance_pc, colour=in_comparators, group=LOCATION)) +
geom_line()
ggplotly(p8)
Visualising maximum minimum deficits over 5 years including GFC years (haven’t figured out a way to keep the year information)
The highest minimum deficit including the financial crisis was Greece (9%)
p9 <- max_min_5 %>%
ggplot(aes(LOCATION,max_5)) +
geom_col()
ggplotly(p9)
Visualising maximum minimum deficits over 5 years excluding GFC years
The rolling minimum was calculated excluding the GFC years
p10 <- max_5ymin %>%
ggplot(aes(LOCATION,min_05y_no_GFC,group=TIME)) +
geom_col()
ggplotly(p10)
p11 <- max_5ya %>%
ggplot(aes(LOCATION,bal_05ya,group=TIME)) +
geom_col()
ggplotly(p11)
Exporting data to CSV
write_csv(df_deficit_pct,"data_output/deficit_dataset_complete.csv")
write_csv(df_deficit_no_GFC,"data_output/deficit_dataset_noGFC.csv")
write_csv(max_min_5_no_GFC,"data_output/max_min_deficit_table.csv")
write_csv(max_5ymin,"data_output/max_5ymin_table.csv")
write_csv(max_5ya,"data_output/max_5ya_table.csv")